library(tidyverse)
library(ggsci) # scale_color_npg
library(tools) # file_path_sans_ext
library(phyloseq)
library(microbiome)
library(eulerr) # venn diagram
library(ggpubr) # as_ggplot
library(patchwork) # arrange multiple plots
library(vegan) # multi-response permutation procedure (MRPP) and adonis
library(hrbrthemes) # scale_y_percent
library(DESeq2)
library(ComplexHeatmap) # heatmap
# heatmaptree
library(ggtree)
library(treeio)

library(circlize) # colorRamp2
library(ggfortify) # pca
library(ggsignif) # geom_signif

# ecological network
library(SpiecEasi)
library(igraph)
library(rsvg) # read svg
library(grImport2) # read svg
library(grid)
library(furrr)
library(writexl) # write table s6
sourceDir <- function(path, trace = TRUE, ...) {
  for (nm in list.files(path, pattern = "[.][RrSsQq]$")) {
    if (!grepl("visualize_save_network", nm, fixed = TRUE)) {
      if (trace) cat(nm,":")      
      source(file.path(path, nm), ...)
      if (trace) cat("\n")
    } else {
      message("Do not source visualize_save_network_RCy3.R")
    }
  }
}
sourceDir("src/R/")
## compare_groups.R :
## ecological_network.R :
## generate_venn_diagram.R :
## get_unique_genus.R :
## plot_composition.R :
## plot_rarecurve.R :

Effects of HA on core body temperature and weight

To confirm the protective role of HA, the rectal temperature (Tre), a higher, more reliable and more stable method to measure core body temperature, was assessed and compared between HA and CR subjects. Supplementary Figure S1 shows that the Tre profile of HA subjects significantly increased in response to the first and second weeks of heat exposure followed by a slow and stable change to a Tre that was significantly decreased to the same level as before exposure.

tre <- read_tsv("data/raw/tre.tsv")
p_s1a <- ggplot(tre) +
  stat_summary(
    fun.y = "mean", aes(x = days, y = rt, color = state), 
    geom = "point", size = 3) +
  stat_summary(fun.y = "mean", aes(x = days, y = rt, color = state), 
    geom = "line") +
  labs(x = "Days", y = "Rectal temperature (°C)") +
  guides(color = guide_legend(title = NULL)) +
  # scale_x_date(date_breaks = "2 day", date_labels = "%m/%d") +
  theme_bw() +
  scale_color_manual(values = c("#4DBBD5FF", "#E64B35FF")) +
  scale_x_continuous(
    breaks = seq.int(0, 36, by = 2),
    labels = seq.int(0, 36, by = 2)
  ) +
  theme(legend.position = c(0.9, 0.8))

weight <- read_tsv("data/raw/weight.tsv")
p_s1b <- ggplot(weight) +
  stat_summary(
    fun.y = "mean", aes(x = days, y = weight, color = state), 
    geom = "point", size = 3) +
  stat_summary(fun.y = "mean", aes(x = days, y = weight, color = state), 
    geom = "line") +
  labs(x = "Days", y = "Body Weight (grams)") +
  guides(color = guide_legend(title = NULL)) +
  # scale_x_date(date_breaks = "2 day", date_labels = "%m/%d") +
  theme_bw() +
  scale_color_manual(values = c("#4DBBD5FF", "#E64B35FF")) +
  scale_x_continuous(
    breaks = seq.int(0, 35, by = 7),
    labels = seq.int(0, 35, by = 7)
  ) +
  ylim(200, 400) +
  scale_y_continuous(
    breaks = seq.int(200, 400, by = 50),
    labels = seq.int(200, 400, by = 50)
  )  +
  theme(legend.position = "none")

p_s1 <- p_s1a + p_s1b + plot_layout(nrow = 1, width = c(1, 1)) + plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_s1.pdf", p_s1, width = 8, 
  height = 4)
p_s1
**Supplementary Figure S1.** Changes in rectal temperature during 28 days of heat exposure.

Supplementary Figure S1. Changes in rectal temperature during 28 days of heat exposure.

Quality control of 16S rRNA microbiome profilling

Reads summary

To assess the changes of the gut microbiota in HA, the gut microbiota was sequenced on 1 day before HA (day 0), and days 14 and 28 in both HA and CR subjects.

Quality filtering on the raw reads were performed under specific filtering conditions to obtain the high-quality clean reads according to the Cutadapt quality controlled process. Chimera sequences were identified and removed using UCHIME algorithm.

reads_sample_name <- c(
  paste0("HA.", rep(c("28.", "14.", "0."), each = 8), c(1:4, 6:9)),
  paste0("CR.", rep(c("28.", "14.", "0."), each = 8), c(1, 3:7, 9, 10))
)

reads_summary <- read_tsv("data/raw/reads_summary.txt") %>%
  filter(`#Sample_name` %in% reads_sample_name) %>% # select 48 samples
  mutate(`#Sample_name` = ifelse(str_detect(`#Sample_name`, "^HA"), # sample name transform
    str_replace(`#Sample_name`, "^HA", "CR"),
    str_replace(`#Sample_name`, "^CR", "HA")
  ))

# write_tsv(reads_summary, "data/raw/reads_summary_new.txt")

# total reads
total_reads <- sum(reads_summary$`Raw_reads(#)`)
total_reads
## [1] 4062872
# high quality reads
effective_reads <- sum(reads_summary$`Clean_Reads(#)`)
effective_reads
## [1] 3824443
# mean reads per sample
effective_reads / 48
## [1] 79675.9
# range of reads across samples
range(reads_summary$`Clean_Reads(#)`)
## [1] 60593 82017

A total of 4062872 reads were obtained which reduced to 3824443 reads after quality filtering (chimera) corresponding to a mean of 79675 reads per sample (ranging from 60593 to 82017).

OTU comparison

A total of 1374 OTUs were identified which has a minimum count of 0.0005 of total reads across samples.

qiime_otu_table <- import_biom(
  BIOMfilename = "data/raw/qiime/otu_table_mc2_w_tax_no_pynast_failures.biom",
  treefilename = "data/raw/qiime/rep_set.tre"
)

# set the taxonomic ranks manually
colnames(tax_table(qiime_otu_table)) <- c(
  "Kingdom", "Phylum", "Class", "Order",
  "Family", "Genus", "Species"
)

mapping <- import_qiime_sample_data(mapfilename = "data/raw/mapping_new.txt")
ps_initial <- merge_phyloseq(qiime_otu_table, mapping)

# filter low confidence otu
# filter low confidence otu
otu_reads <- taxa_sums(ps_initial)
big_otu <- otu_reads[otu_reads/sum(otu_reads) > 0.00005]
ps_big <- prune_taxa(names(big_otu), ps_initial)

# 1374 OTUs
ntaxa(ps_big)
## [1] 1374

As we expected, no significant differences were found between CR and HA on day 0 by comparing the number of OTUs (1038 and 1071 OTUs; P = 0.6, Wilcoxon sum test; Supplementary Figure S2A, C). The number of OTUs was significant different between CR and HA on day 28 (1207 and 1279 OTUs, respectively; P = 0.014, Wilcoxon sum test) rather and day 14 (1313 and 1295 OTUs; P = 0.19, Wilcoxon sum test). (Supplementary Figure S2A, D, E).

The averaged coverage and subsampling were sufficient to describe gut bacterial communities according to sequencing-based rarefaction curves (Supplementary Figure S2B).

p_s2b <- plot_rarecurve(ps_big) + labs(y = "Number of OTUs")
# comparison of the number of otus
ps_meta <- meta(ps_big)
otus_number <- otu_table(ps_big)@.Data %>% 
  as_tibble() %>% 
  map_int(., ~ sum(.x > 0))

otu_number_df <- tibble(
  sample = ps_meta$X.SampleID, 
  treatment = ps_meta$Treatment, 
  time = ps_meta$Time,
  otus_number = otus_number)

p_s2a <- ggplot(otu_number_df, aes(treatment, otus_number, fill = treatment)) +
  geom_boxplot() +
  facet_wrap(~time, labeller = labeller(time = function(x) paste0("Day:", x))) +
  theme_bw() +
  ylab("Number of OTUs") +
  xlab(NULL) +
  theme(legend.position = "none") +
  expand_limits(y = c(250, 1150)) +
  stat_compare_means(comparisons = list(c("CR", "HA")), label = "p.signif", tip.length = 0, label.y = 1100) +
  scale_fill_manual(values = c("#4DBBD5FF", "#E64B35FF"))
npg_colors_2 <- ggsci::pal_npg("nrc")(2) %>% rev()
ps_0 <- subset_samples(ps_big, Time == 0) %>% 
  prune_taxa(taxa_sums(.) > 0, .)
p_s2c <- generate_venn_diagram(ps_0) %>% 
   plot(
     fills = list(fill = npg_colors_2, alpha = 0.6),
     legend = list(side = "top", nrow = 1, ncol = 2),
     labels = FALSE,
     quantities = list(fontsize = 10)
    )
  

ps_14 <- subset_samples(ps_big, Time == 14) %>% 
  prune_taxa(taxa_sums(.) > 0, .)
p_s2d <- generate_venn_diagram(ps_14) %>% 
  plot(
    fills = list(fill = npg_colors_2, alpha = 0.6),
    legend = FALSE,
    labels = FALSE,
      quantities = list(fontsize = 10)
    )

ps_28 <- subset_samples(ps_big, Time == 28) %>% 
  prune_taxa(taxa_sums(.) > 0, .)
p_s2e <- generate_venn_diagram(ps_28) %>% 
  plot(
    fills = list(fill = npg_colors_2, alpha = 0.6),
    legend = FALSE,
    labels = FALSE, 
    quantities = list(fontsize = 10)
    )
p_s2 <- {p_s2a + p_s2b + plot_layout(nrow = 1, width = c(1.5, 1))} -
  {as_ggplot(p_s2c) + as_ggplot(p_s2d) + as_ggplot(p_s2e) +  
      plot_layout(nrow = 1)} +
  plot_layout(ncol = 1) + plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_s2.pdf", p_s2, width = 10, height = 7)
p_s2
**Supplementary Figure S2**. Quality control of 16S rRNA V3-V4 reads. Number of OTUs **(A)** after quality filtering on day 0, 14 and 28. Wilcoxon test was used to compare CR and HA. **(B)** Rarefaction curves for all samples with X axis representing number of sequences and Y axis representing number of observed taxa. Venn diagram showing the number of OTUs exclusively identified in each group on day 0 **(C)**, 14 **(D)** and day 28 **(E)**.

Supplementary Figure S2. Quality control of 16S rRNA V3-V4 reads. Number of OTUs (A) after quality filtering on day 0, 14 and 28. Wilcoxon test was used to compare CR and HA. (B) Rarefaction curves for all samples with X axis representing number of sequences and Y axis representing number of observed taxa. Venn diagram showing the number of OTUs exclusively identified in each group on day 0 (C), 14 (D) and day 28 (E).

Composition shifts of gut microbiota in HA

No significant differences in microbiome diversity on day 0

No significant differences were found in microbiome diversity between CR and HA (Supplementary Figure S3)

alpha_diversity <- estimate_richness(ps_big)
alpha_diversity$sample <- row.names(alpha_diversity)
alpha_df <- full_join(ps_meta, alpha_diversity, 
  by = c("X.SampleID" = "sample"))

# day 0 附件
alpha_df_0 <- filter(alpha_df, grepl(0, X.SampleID)) %>% 
  select(one_of("Observed", "Shannon", "Simpson", "ACE", "Treatment")) %>% 
  gather("index", "value", -Treatment)

p_alpha_0 <- ggplot(alpha_df_0, aes(Treatment, value, fill = Treatment)) +
  geom_boxplot() +
  facet_wrap(~index, scales = "free_y", nrow = 1) + 
  theme_bw() +
  theme(legend.position = "none") +
  scale_fill_manual(values = npg_colors_2) +
  stat_compare_means(comparisons = list(c("CR", "HA")), 
    label = "p.signif", tip.length = 0) +
  # scale_x_discrete(labels = c("HA","control" = "CR")) +
  labs(x = NULL, y = "Alpha Diversity Measure")

Anosim and multi-response permutation procedure (MRPP) anylysis between different groups on day 0

## anosim, MRPP, bray wunifrac
pvalue_0 <- compare_groups(ps_0)
pvalue_0
## $Anosim
##    bray jaccard 
##   0.037   0.034 
## 
## $MRPP
##    bray jaccard 
##   0.061   0.064

Beta diversity

min_reads_0 <- min(sample_sums(ps_0))
ps_ev_0 = transform_sample_counts(ps_0, 
  function(x) min_reads_0 * x/sum(x))
pcoa_bray_0 <- ordinate(ps_ev_0, method = "PCoA", distance = "bray")
p_beta_bray_0 <- plot_ordination(ps_ev_0, pcoa_bray_0, 
  axes = c(1, 2),
  color = "Treatment") + 
  geom_point(size = 2) +
  theme_bw() + 
  scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
  theme(legend.title = element_blank(), legend.position = "none")
  # annotate("text",
  #   label = "bold(Anosim)~~Bray~italic(P)==0.46~~Jaccard~italic(P)==0.49", 
  #   x=0.25, y=-0.77, parse = TRUE, size = 3)

pcoa_jaccard_0 <- ordinate(ps_ev_0, method = "PCoA", distance = "jaccard")
p_beta_jaccard_0 <- plot_ordination(ps_ev_0, pcoa_jaccard_0,
  axes = c(1, 2),
  color = "Treatment") +
  geom_point(size = 2) +
  # stat_ellipse(level = 0.95) +
  # geom_text(aes(label = X.SampleID), nudge_x = 0.1, nudge_y = 0.001) +
  theme_bw() +
  scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
  theme(legend.title = element_blank())
  # annotate("text",
  #   label = "bold(MRPP)~~Bray~italic(P)==0.377~~Jaccard~italic(P)==0.368", 
  #   x=0, y=-0.78, parse = TRUE, size = 3)
p_s3 <- p_alpha_0 - {p_beta_bray_0 + p_beta_jaccard_0 + plot_layout(nrow = 1)} +
  plot_layout(nrow = 2, heights = c(1.4, 1)) + plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_s3.pdf", p_s3, width = 7, height = 6)
p_s3
**Supplementary Figure S3.** Diversity analysis on day 0. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. P values are from Wilcoxon rank sum test. P values: ns, no significance P > 0.05.

Supplementary Figure S3. Diversity analysis on day 0. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. P values are from Wilcoxon rank sum test. P values: ns, no significance P > 0.05.

No significant differences in microbiome diversity on day 14

No significant differences were observed in alpha diversity indices on day 14 (Supplementary Figure S4 A, all P > 0.05, Wilcoxon sum test).

alpha_df_14 <- filter(alpha_df, grepl(14, X.SampleID)) %>% 
  select(one_of("Observed", "Shannon", "Simpson", "ACE", "Treatment")) %>% 
  gather("index", "value", -Treatment)

p_alpha_14 <- ggplot(alpha_df_14, aes(Treatment, value, fill = Treatment)) +
  geom_boxplot() +
  facet_wrap(~index, scales = "free_y", nrow = 1) + 
  theme_bw() +
  theme(legend.position = "none") +
  scale_fill_manual(values = npg_colors_2) +
  stat_compare_means(comparisons = list(c("CR", "HA")), 
    label = "p.signif", tip.length = 0) +
  # scale_x_discrete(labels = c("HA","control" = "CR")) +
  labs(x = NULL, y = "Alpha Diversity Measure")

Anosim and multi-response permutation procedure (MRPP) anylysis between different groups on day 14

## anosim, MRPP, bray wunifrac
pvalue_14 <- compare_groups(ps_14)
pvalue_14
## $Anosim
##    bray jaccard 
##   0.530   0.554 
## 
## $MRPP
##    bray jaccard 
##   0.532   0.489

Beta diverisy

min_reads_14 <- min(sample_sums(ps_14))
ps_ev_14 = transform_sample_counts(ps_14, 
  function(x) min_reads_14 * x/sum(x))
pcoa_bray_14 <- ordinate(ps_ev_14, method = "PCoA", distance = "bray")
p_beta_bray_14 <- plot_ordination(ps_ev_14, pcoa_bray_14, 
  axes = c(1, 2),
  color = "Treatment") + 
  geom_point(size = 2) +
  theme_bw() + 
  scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
  theme(legend.title = element_blank(), legend.position = "none")
  # annotate("text",
  #   label = "bold(Anosim)~~Bray~italic(P)==0.46~~Jaccard~italic(P)==0.49", 
  #   x=0.25, y=-0.77, parse = TRUE, size = 3)

pcoa_jaccard_14 <- ordinate(ps_ev_14, method = "PCoA", distance = "jaccard")
p_beta_jaccard_14 <- plot_ordination(ps_ev_14, pcoa_jaccard_14,
  axes = c(1, 2),
  color = "Treatment") +
  geom_point(size = 2) +
  # stat_ellipse(level = 0.95) +
  # geom_text(aes(label = X.SampleID), nudge_x = 0.1, nudge_y = 0.001) +
  theme_bw() +
  scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
  theme(legend.title = element_blank())
  # annotate("text",
  #   label = "bold(MRPP)~~Bray~italic(P)==0.377~~Jaccard~italic(P)==0.368", 
  #   x=0, y=-0.78, parse = TRUE, size = 3)

Microbiome diversity is significantly different on day 28

Alpha diversity showed a significant increase in the HA group compared to the control on day 28 (Figure 1A, all P < 0.05, Wilcoxon sum test).

alpha_df_28 <- filter(alpha_df, grepl(28, X.SampleID)) %>% 
  select(one_of("Observed", "Shannon", "Simpson", "ACE", "Treatment")) %>% 
  gather("index", "value", -Treatment)

p_alpha_28 <- ggplot(alpha_df_28, aes(Treatment, value, fill = Treatment)) +
  geom_boxplot() +
  facet_wrap(~index, scales = "free_y", nrow = 1) + 
  theme_bw() +
  theme(legend.position = "none") +
  scale_fill_manual(values = npg_colors_2) +
  stat_compare_means(comparisons = list(c("CR", "HA")), 
    label = "p.signif", tip.length = 0) +
  scale_x_discrete(labels = c("heat" = "HA","control" = "CR")) +
  labs(x = NULL, y = "Alpha Diversity Measure")

HA group were clearly separated from control group on day 28 (all P < 0.05 by Anosim and multi-response permutation procedure (MRPP) analysis, for Bray and Jaccard distances, respectively) (Figure 1B, C).

## anosim, MRPP, bray wunifrac
pvalue_28 <- compare_groups(ps_28)
pvalue_28
## $Anosim
##    bray jaccard 
##   0.002   0.004 
## 
## $MRPP
##    bray jaccard 
##   0.003   0.004
min_reads <- min(sample_sums(ps_28))
ps_ev_28 = transform_sample_counts(ps_28, function(x) min_reads * x/sum(x))

pcoa_bray_28 <- ordinate(ps_ev_28, method = "PCoA", distance = "bray")
p_beta_bray_28 <- plot_ordination(ps_ev_28, pcoa_bray_28, 
  axes = c(1, 2),
  color = "Treatment") + 
  geom_point(size = 2) +
  stat_ellipse(level = 0.95) + 
  # geom_text(aes(label = X.SampleID), nudge_x = 0.1, nudge_y = 0.001) + 
  theme_bw() + 
  scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
  theme(legend.title = element_blank(), legend.position = "none") +
  annotate("text",
    label = "bold(Anosim)~~Bray~italic(P)==0.003~~Jaccard~italic(P)==0.003", 
    x=-0.18, y=-0.77, parse = TRUE, size = 3)
  # annotation_custom(ggplotGrob(p_pvalue_28), xmin = -0.75, 
  #  xmax = -0.4, ymin = 0.4)
pcoa_jaccard_28 <- ordinate(ps_ev_28, method = "PCoA", distance = "jaccard")
p_beta_jaccard_28 <- plot_ordination(ps_ev_28, pcoa_jaccard_28,
  axes = c(1, 2),
  color = "Treatment") +
  geom_point(size = 2) +
  stat_ellipse(level = 0.95) +
  # geom_text(aes(label = X.SampleID), nudge_x = 0.1, nudge_y = 0.001) +
  theme_bw() +
  scale_color_manual(labels = c(control = "CR", heat = "HA"), values = npg_colors_2) +
  theme(legend.title = element_blank()) + 
  annotate("text",
    label = "bold(MRPP)~~Bray~italic(P)==0.001~~Jaccard~italic(P)==0.005", 
    x=-0.2, y=-0.78, parse = TRUE, size = 3)
p_f1 <- p_alpha_28 + {p_beta_bray_28 + p_beta_jaccard_28 + plot_layout(ncol = 2)} +
  plot_layout(nrow = 2, heights = c(1.3, 1)) + plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_1.pdf", p_f1, width = 8, height = 7)
p_f1
**Figure 1.** Diversity analysis on day 28. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. Significant P-values of Anosim and multi-response permutation procedure (MRPP) between groups emphasize the differences in microbial community structure. P values: * P < 0.05, ** P < 0.01.

Figure 1. Diversity analysis on day 28. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. Significant P-values of Anosim and multi-response permutation procedure (MRPP) between groups emphasize the differences in microbial community structure. P values: * P < 0.05, ** P < 0.01.

Micriobiome compostion at phylum level

Phylum compostion of all samples

top_phylum <- aggregate_top_taxa(ps_big, 8, "Phylum") %>% 
  microbiome::transform("compositional")
p_composition <- plot_composition2(top_phylum) + scale_fill_npg() + 
  labs(x = NULL, y = "Relative abundance")
ggsave("output/figure/figure_2a.pdf", p_composition, width = 6, height = 6)
p_composition
**Figure 2A.** Relative abundance of bacterial at the phylum level.

Figure 2A. Relative abundance of bacterial at the phylum level.

Since population differences in the gut microbiota only observed between HA and CR on day 28, only subjects on day 28 were considered in the following analysis. To identify the taxa that were differentially represented in HA and CR subjects, we compared the relative abundances between these two groups at different taxonomic levels.

The dominant of the bacterial phyla identified in the fecal samples were encompassed by Firmicutes, Bacteroidetes, Proteobacteria, Cyanobacteria and Actinobacteria (Figure S4D).

There were subtle differences in bacterial community composition between HA and CR subjects (Supplementary Figure S4D)

p_mean_comosition <- subset_samples(top_phylum, Time == 28) %>% 
  plot_composition2(group = "group") +
  # scale_fill_manual(values = color[[1]]) +
  scale_x_discrete(labels = c("CR28" = "CR", "HA28" = "HA")) +
  scale_y_percent() + 
  theme_bw() +
  scale_fill_npg() + 
  labs(x = NULL, y = "Relative abundance") + 
  coord_flip() +
  theme(legend.position = "bottom")
p_s4 <- p_alpha_14 + 
  {p_beta_bray_14 + p_beta_jaccard_14 + plot_layout(ncol = 2)} + 
  p_mean_comosition + 
  plot_layout(nrow = 3, heights = c(1.4, 1, 0.8)) +
  plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_s4.pdf", p_s4, width = 8, height = 10)
p_s4
**Supplementary Figure S4.** Diversity analysis on day 14. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. Significant P-values of Anosim and multi-response permutation procedure (MRPP) between groups emphasize the differences in microbial community structure. (D) Relative abundance of bacterial phyla. P values: ng, no significance P > 0.05.

Supplementary Figure S4. Diversity analysis on day 14. (A) Alpha diversity assessed by richness (ACE, Observed), diversity (Shannon, Simpson). Boxes represent the interquartile ranges, and the inside black plots represent the median and circles are outliers. P values are from Wilcoxon rank sum test. Beta diversity assessed by Principal coordinate analysis (PCoA) based on the Bray-Curtis (B) and Jaccard (C) distances. Significant P-values of Anosim and multi-response permutation procedure (MRPP) between groups emphasize the differences in microbial community structure. (D) Relative abundance of bacterial phyla. P values: ng, no significance P > 0.05.

However, no significant difference was found at phylum level (all P > 0.05, Wilcoxon sum test, Supplementary Table S1).

calculate_p <- function(abdlist) {
  t <- wilcox.test(abdlist[[1]], abdlist[[2]], paired = FALSE)
  return(t$p.value)
} 

phylum_28 <- aggregate_taxa(ps_28, "Phylum") %>% 
  microbiome::transform("compositional") %>% 
  psmelt()

phylum_p <- group_by(phylum_28, Phylum, Treatment) %>% 
  do(abd = .$Abundance) %>% 
  group_by(Phylum) %>% 
  do(abdlist = .$abd) %>% 
  mutate(p_value = calculate_p(abdlist))
phylum_p$q <- p.adjust(phylum_p$p_value, "BH")
  
mean_phylum <- group_by(phylum_28, Phylum, Treatment) %>% 
  summarise(mean = mean(Abundance)) %>% 
  spread(Treatment, mean)

diff_phylum <- inner_join(phylum_p, mean_phylum) %>% 
  arrange(q) %>% 
  mutate(`p-value` = round(p_value, 4), FDR = round(q, 4), 
    CR = round(CR, 4), HA = round(HA, 4)) %>% 
  select(Phylum, `p-value`, FDR, CR, HA)
write_csv(diff_phylum, "output/table/table_s1.csv")
diff_phylum

Microbiome composition at genus level

Genus level analysis showed that there were four significantly differential genus in HA subjects compared to CR subjects, including Blautia, Oscillospira, Lactobacillus, Allobaculum, lactobacillus, a common probiotic, was significantly increased in HA subjects (Supplementary Table S2).

genus <- aggregate_taxa(ps_28, "Genus") %>% 
  format_unique_genus() %>% 
  microbiome::transform("compositional") %>% 
  psmelt()

genus_p <- group_by(genus, Genus, Treatment) %>% 
  do(abd = .$Abundance) %>% 
  group_by(Genus) %>% 
  do(abdlist = .$abd) %>% 
  mutate(p_value = calculate_p(abdlist))
genus_p$fdr <- p.adjust(genus_p$p_value, "BH")

mean_genus <- group_by(genus, Genus, Treatment) %>% 
  summarise(mean = mean(Abundance)) %>% 
  spread(Treatment, mean)

diff_genus <- inner_join(genus_p, mean_genus) %>% 
  arrange(fdr) %>% 
  mutate(`p-value` = round(p_value, 4), FDR = round(fdr, 4), 
    CR = round(CR, 4), HA = round(HA, 4)) %>% 
  select(Genus, `p-value`, FDR, CR, HA)

write_csv(diff_genus, "output/table/table_s2.csv")
diff_genus

差异属的比值热图

sig_diff_genus <- filter(genus_p, 
  Genus %in% paste0("g__", c("Lactobacillus", "Oscillospira", "Blautia", 
    "Allobaculum", "Prevotella"))
)

# sig_diff_genus <- filter(genus_p, p_value < 0.05)
sig_genus <- bind_cols(map(sig_diff_genus$abdlist, unlist))
names(sig_genus) <- as.character(sig_diff_genus$Genus)

row.names(sig_genus) <- c(paste("C_", 1:8), paste("H_", 1:8))
sig_genus2 <- t(sig_genus)
sig_mean_c <- sig_genus2[, 1:8] %>% rowMeans()
sig_mean_h <- sig_genus2[, 9:16] %>% rowMeans()
sig_genus_fc1 <- sweep(sig_genus2[, 1:8], 1, sig_mean_h, FUN = "/")
sig_genus_fc2 <- sweep(sig_genus2[, 9:16], 1, sig_mean_c, FUN = "/")
sig_genus_fc <- cbind(sig_genus_fc1, sig_genus_fc2)

Heatmap(sig_genus_fc %>% log2, 
  col = c("steelblue", "#EEEEEE", "red"),
    #col = circlize::colorRamp2(c(-5, 0, 5), c("blue", "white", "red")),
    heatmap_legend_param = list(
      title = "Z score",
      title_position = "lefttop-rot"
))

The genera abundance distribution was quite discrepant to that of CRs (Figure 2C).

# relative abundance
otus_28 <- tax_glom(ps_ev_28, "Genus") %>% 
  otu_table()
otus_28 <- otus_28@.Data
d <- vegdist(otus_28)
cluster <- hclust(d)
tree <- treeio::as.phylo(cluster)
# genus info
tree_info <- tax_glom(ps_ev_28, "Genus") %>% tax_table() 
tree_info <- tree_info@.Data
tree_info_df <- data.frame(id = row.names(tree_info), tree_info)
p_composition_level <- levels(p_composition$data$Phylum)
tree_info_df$Phylum <- factor(tree_info_df$Phylum, 
  levels = p_composition_level[-1])
# p <- ggtree(tree, layout = "circular", branch.length = "none")
p_tree_phylum <- ggtree(tree, layout = "circular", branch.length = "none") %<+% 
  tree_info_df + 
  geom_tippoint(aes(color=Phylum)) +
  scale_color_npg() +
  theme(legend.position = "none")

# z score of genus
heatmap_data <- microbiome::transform(tax_glom(ps_ev_28, "Genus"), transform = "Z")
ht <- otu_table(heatmap_data)@.Data

exp_group <- ifelse(grepl("^C", colnames(ht)), TRUE, FALSE)
cr_zmean <- apply(ht[, exp_group], 1, mean)
ha_zmean <- apply(ht[, !exp_group], 1, mean)
nm <- row.names(ht)
ht_df <- data.frame(CR = cr_zmean, HA = ha_zmean)

p_heatmap_tree <- gheatmap(p_tree_phylum, ht_df, color = NULL, font.size = 2.5,
  colnames_position = "top",
  hjust = 0.4, width = 0.3) +
  scale_fill_gradient2(low="steelblue", mid = "#EEEEEE", high = "red", 
    breaks = c(-1, -0.5, 0, 0.5, 1), labels = c(-1, -0.5, 0, 0.5, 1)) +
  guides(color = FALSE) +
  # theme(legend.title = element_text(vjust = 0)) +
  guides(fill = guide_colorbar(
    title = "Z score",
    title.position = "left",
    # direction = "horizontal",
    barheight = 0.8
    # barwidth = 0.8
    )) +
  theme(legend.direction = "horizontal", 
    legend.position = c(0.5, 0.02),
    legend.title = element_text(vjust = 1))

ggsave("output/figure/figure_2b.pdf", p_heatmap_tree, width = 6, height = 6, units = "in")
p_heatmap_tree
**Figure 2C.** Heatmap tree shows genera significantly different in HA compare to those in CR group, and their phylogenic relationships on day 28. The Abundance profiles are expressed by z-scores.

Figure 2C. Heatmap tree shows genera significantly different in HA compare to those in CR group, and their phylogenic relationships on day 28. The Abundance profiles are expressed by z-scores.

To explore more precisely the taxa that were driving the differentiation of the microbiota of the groups, we performed differential analysis based on DESeq2 (Love et al., 2014), a method using negative binomial GLM to obtain maximum likelihood estimates between two conditions. Then the RA of taxa, which showed false discovery rate (FDR)-corrected P value < 0.05 with differential expression analysis conducted on whole microbiota profile, were expressed as heat map (Figure 2B, Supplementary Table S3).

diagdds = phyloseq_to_deseq2(ps_28, ~ Treatment)

gm_mean = function(x, na.rm=TRUE){
  exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
}
geoMeans = apply(counts(diagdds), 1, gm_mean)
diagdds = estimateSizeFactors(diagdds, geoMeans = geoMeans)
diagdds = DESeq(diagdds, fitType="local")
res = results(diagdds)
res = res[order(abs(res$padj), na.last=NA), ]
# res_fc = res[order(res$log2FoldChange, na.last=NA,  decreasing = TRUE), ]
alpha = 0.05
sigtab = res[(res$padj < alpha), ]
# sigtab_fc = res_fc[(res_fc$padj < alpha), ]
sigtab = cbind(as(sigtab, "data.frame"),
  as(tax_table(ps_28)[rownames(sigtab), ], "matrix"))

diff_otu <- row.names(sigtab)
otus <- otu_table(microbiome::transform(ps_28, "log10p"))
diff_otus <- otus[row.names(otus) %in% diff_otu[1:40], ]
diff_otus <- diff_otus@.Data
taxas <- tax_table(ps_28)@.Data
diff_taxas <- taxas[row.names(taxas) %in% row.names(diff_otus), ]
row.names(diff_otus) <- paste(diff_taxas[, "Phylum"], 
  diff_taxas[, "Family"], ";")
# pheatmap::pheatmap(diff_otus, annotation_col = sample_col, cutree_cols = 2)
p_heatmap <- Heatmap(diff_otus@.Data, column_split = 2, 
  column_title = NULL,
  col = colorRamp2(c(0,2,4), c("steelblue", "#EEEEEE", "red")),
  column_names_gp = gpar(fontsize = 9),
  row_names_gp = gpar(fontsize = 9),
  bottom_annotation = HeatmapAnnotation(
    fill = anno_block(gp = gpar(fill =c("grey24")),
    labels = c("CR", "HA"), 
    labels_gp = gpar(col = "white", fontface = "bold"))),
  heatmap_legend_param = list(
    x = unit(1, "cm"), y = unit(1, "cm"),
    title = "Log Abundance", 
    title_position = "leftcenter-rot",
    legend_height = unit(6, "cm")),
  
)

p_heatmap <- as_ggplot(grid.grabExpr(draw(p_heatmap)))
ggsave("output/figure/figure_2c.pdf", p_heatmap, width = 8, height = 12)
p_heatmap
**Figure 2B.** Hierarchical clustering with a heat map shows the RA of representative OTUs (those with greatest difference between HA and CR on day 28) group means from each OTU selected for P < 0.05, obtained with differential abundance analysis with DESeq2. The OTUs are shown as phylum and family.

Figure 2B. Hierarchical clustering with a heat map shows the RA of representative OTUs (those with greatest difference between HA and CR on day 28) group means from each OTU selected for P < 0.05, obtained with differential abundance analysis with DESeq2. The OTUs are shown as phylum and family.

st3 <- mutate(sigtab, 
  baseMean = round(baseMean, 4),
  log2FoldChange = round(log2FoldChange, 4),
  lfcSE = round(lfcSE, 4),
  stat = round(stat, 4),
  pvalue = formatC(pvalue, format = "e", digits = 4),
  padj = formatC(padj, format = "e", digits = 4)
  ) %>% 
  select(Genus, baseMean:padj)
st3$OTU <- row.names(sigtab)

st3 <- select(st3, OTU, Genus, baseMean, log2FoldChange, pvalue, padj)
write_tsv(st3, "output/table/figure_s3.tsv")
st3

Biomaker prediction using lefse

The following analysis inlcuding lefse OTU biomarker (using lefse) and functional prediction (usring picrust, lefse, and bugbase), is based on the taxa tables from qiime. Thus, we should convert the qiime taxa tables to the file that is compatible with lefse, picrust and bugbase.

You should downloaded greegene database from ftp://greengenes.microbio.me/greengenes_release/gg_13_5/gg_13_5_otus.tar.gz before run the folling script, unzip it and put file 97_otus.fasta under directory data/raw.

sh src/shell/qiime_to_lefse_picrust_bugbase.sh

Differentially abundant taxa (Supplementary Table S4) were further confirmed by LEfSe. Fist, generate the input file for lefse

qiime_lefse <- read_tsv("data/processed/summaryize_taxa_L6/mapping_new_L6.txt") %>% 
  select(Treatment, 
    `Unassigned|Other`:`k__Bacteria|p__Verrucomicrobia|c__Verrucomicrobiae|o__Verrucomicrobiales|f__Verrucomicrobiaceae|g__Akkermansia`) %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column('otu') %>% 
  filter(!grepl("(Other)|(__)$", otu)) # 删除未确定的taxa
  
write_tsv(qiime_lefse, "data/processed/lefse_qiime/lefse_otu_input.txt", col_names = FALSE)

lefse analysis

format_input.py data/processed/lefse_qiime/lefse_otu_input.txt \
output/lefse_qiime/lefse_qiime.in -c 1 -o 1000000

run_lefse.py output/lefse_qiime/lefse_qiime.in output/lefse_qiime/lefse_qiime_out.res

plot_cladogram.py output/lefse_qiime/lefse_qiime_out.res output/lefse_qiime/figure_3.pdf \
--dpi 300 --format pdf --title="" --right_space_prop 0.3 --label_font_size 8
include_graphics("output/lefse_qiime/figure_3.pdf")

lefse_otu_marker <- 
  read_tsv(
    "output/lefse_qiime/lefse_qiime_out.res", 
    col_names = c("otu", "mean_abd", "enrich_group", "lda", "p")
  ) %>% 
  filter(!is.na(enrich_group)) %>% 
  mutate(lda = round(lda, 4),
    p = round(as.numeric(p), 4)) %>% 
  select(otu, enrich_group, lda, p) %>% 
  arrange(desc(enrich_group), desc(lda))

write_tsv(lefse_otu_marker, "output/lefse_qiime/table_s4.tsv")

lefse_enrich <- 
  read_tsv(
    "output/lefse_qiime/lefse_qiime_out.res", 
    col_names = c("otu", "mean_abd", "enrich_group", "lda", "p")
  ) %>% 
  filter(!is.na(enrich_group))
write_tsv(lefse_enrich, "output/lefse_qiime/lefse_enriched.tsv", 
  col_names = FALSE)
lefse_otu_marker

Functional shifts of gut microbiota in HA

Functional prediction using PICRUSt

To infer the functional alternations of gut microbiota in HA, we employed PICRUSt (Langille et al., 2013) algorithm to predict the functional composition profiles from 16S rRNA gene-based microbial compositions.

# Normalize OTU Table
normalize_by_copy_number.py -i data/processed/picrust.biom \
-o output/picrust_kegg/normaolized_otus.biom

# Predict Functions For Metagenome
predict_metagenomes.py -i output/picrust_kegg/normaolized_otus.biom \
-o output/picrust_kegg/metagenome_predictions.biom

# convert to tab for pca 
biom convert -i output/picrust_kegg/metagenome_predictions.biom \
-o output/picrust_kegg/metagenome_predictions.txt --to-tsv

Overall, the microbial communities present in HA and CR could be distinguished based on their functions (Figure S4A).

picrust_ko <- read_tsv(
  "output/picrust_kegg/metagenome_predictions.txt", 
  skip = 1
)

pca_in <- select(picrust_ko, -`#OTU ID`) %>% 
  t()
subjects_name <- row.names(pca_in)
pca_color <- tibble(name = subjects_name, 
  color = ifelse(grepl("^HA", subjects_name), "HA", "CR"))

p_f4a <- autoplot(prcomp(pca_in), 
  data = pca_color, colour = "color") + 
  theme_bw() +
  scale_color_manual(values = npg_colors_2) +
  theme(legend.position = "none") 

Based on this, we then performed kegg pathway enrichment analysis using lefse.The thousands of predicted functions should be collapsed into higher categories (kegg pathway).

categorize_by_function.py -f -i output/picrust_kegg/metagenome_predictions.biom \
-c KEGG_Pathways -l 2 -o output/picrust_kegg/kegg_pathway_l2.txt

Format the kegg pathway to the file that is compatible with lefse.

picrust_kegg <- read_tsv("output/picrust_kegg/kegg_pathway_l2.txt", skip = 1) 
picrust_kegg$pathway <- str_replace_all(picrust_kegg$`#OTU ID`, " ", "_")
picrust_kegg$`#OTU ID` <- NULL
picrust_kegg$KEGG_Pathways <- NULL
picrust_kegg <- select(picrust_kegg, pathway, everything())

picrust_kegg <- select(picrust_kegg, pathway, everything())
lefse_class <- c("class", 
  ifelse(grepl("^C", names(picrust_kegg)[-1]), "CR", "HA")) %>% 
  matrix( ncol = ncol(picrust_kegg)) %>% 
  as.data.frame(stringsAsFactors = FALSE) %>% 
 set_names(names(picrust_kegg))
picrust_kegg <- rbind(lefse_class, picrust_kegg)
write_tsv(picrust_kegg, "output/picrust_kegg/lefse_kegg_in.txt", col_names = FALSE)

The predicted KEGG pathway significantly enriched in HA included cell motility, signal transduction, lipid metabolism, metabolism of other amino acids, neurodegenerative diseases, environmental adaption and transport and catabolism (Figure 4C, Supplementary Table S5).

format_input.py output/picrust_kegg/lefse_kegg_in.txt \
output/picrust_kegg/lefse_kegg.in -c 1 -o 1000000

run_lefse.py output/picrust_kegg/lefse_kegg.in \
output/picrust_kegg/lefse_kegg_out.res
lefse_pathway <- read_tsv(
  "output/picrust_kegg/lefse_kegg_out.res",
  col_names = c("pathway", "mean_abd", "enrich_group", "lda", "p")
)

enrich_pathway <- filter(lefse_pathway, !is.na(enrich_group))

# pathway ordered according to lda in different groups
pathway_order <- arrange(enrich_pathway, 
  desc(enrich_group), desc(lda))$pathway
p_f4c <- ggplot(enrich_pathway, aes(pathway, lda)) +
  geom_segment(aes(xend = pathway, yend = 0, colour = enrich_group), size = 8) +
  theme_bw() +
  scale_color_manual(values = npg_colors_2) +
  labs(y = "LDA SCORE (log10)", x = "KEGG pathway") +
  guides(colour = guide_legend(title = NULL)) +
  theme(legend.position = c(0.8, 0.87), legend.direction = "horizontal",
    axis.text.x = element_text(angle = -90, hjust = 0)) +
  scale_x_discrete(limits = pathway_order) +
  scale_y_continuous(expand = c(0, 0))

pathway enrichment analysis using lefse

lefse_pathway_marker <- 
  read_tsv(
    "output/picrust_kegg/lefse_kegg_out.res", 
    col_names = c("otu", "mean_abd", "enrich_group", "lda", "p")
  ) %>% 
  filter(!is.na(enrich_group)) %>% 
  mutate(lda = round(lda, 4),
    p = round(as.numeric(p), 4)) %>% 
  select(otu, enrich_group, lda, p) %>% 
  arrange(desc(enrich_group), desc(lda))

write_tsv(lefse_pathway_marker, "output/picrust_kegg//table_s5.tsv")
lefse_pathway_marker

Phynotypic prediction using Bubase

We predicted nine potential phenotypes, including aerobic, anaerobic, containing mobile elements, facultatively anaerobic, biofilm forming, Gram negative, Gram positive, potentially pathogenic, and stress tolerant.

Generate the bugbase mapping file

bugbase_mapping <- meta(ps_28) %>% mutate(`#SampleID` = X.SampleID)
bugbase_mapping <- select(bugbase_mapping, `#SampleID`, Treatment)

write_tsv(bugbase_mapping, "output/bugbase/bugbase_mapping.txt")

Run bugbase

run.bugbase.r -i data/processed/bugbase_json.biom  \
-m output/bugbase/bugbase_mapping.txt -c Treatment \
-o output/bugbase/bugbase_output_class_T0.001 -T 0.001 -t 3

Among all the phenotypes, the relative abundance of the Aerobic was significantly increased and the stress tolerant was significantly decreased after HA (Figure 4B).

bugbase_predictions <- read_tsv(
  "output/bugbase/bugbase_output_class_T0.001/predicted_phenotypes/predictions.txt"
) 
mappings <- read_tsv("output/bugbase/bugbase_mapping.txt")

phynotypes <- c(
  "Aerobic",
  "Anaerobic", 
  "Contains_Mobile_Elements",
  "Facultatively_Anaerobic",
  "Forms_Biofilms",
  "Gram_Negative",
  "Gram_Positive",
  "Potentially_Pathogenic",
  "Stress_Tolerant"
)

extract_bugbase_p <- function(file) {
  bugbase_stat <- read_tsv(file, col_names = FALSE)
  
  return(as.numeric(bugbase_stat[nrow(bugbase_stat), 1]))
}

files <- paste0("output/bugbase/bugbase_output_class_T0.001/predicted_phenotypes/",
  phynotypes, "_stats.txt")
phynotypes_p <- map_dbl(files, extract_bugbase_p)
names(phynotypes_p) <- phynotypes
  
signf_phenotype <- phynotypes_p[phynotypes_p <= 0.05]
bugbase_signf <- bugbase_predictions[c("X1", names(signf_phenotype))]



bugbase_signf <- full_join(mappings, bugbase_signf, 
  by = c("#SampleID" = "X1")) %>% 
  gather("phenotype", "value", Aerobic:Stress_Tolerant)


# different limits in different facet
my_limits_df <- data.frame(Treatment = NA,
  phenotype = rep(names(signf_phenotype), each =2),
  value = c(0, 0.15, 0.98, 1.001))

my_pvalue_df <- data.frame(
  Treatment = NA,
  phenotype = c(names(signf_phenotype)),
  start = rep("CR",2), end = rep("HA", 2),
  y = c(0.085, 1.0005),
  label = rep("*", 2),
  tip = c(0.002, 0.01)
)

p_bugbase <- ggplot(bugbase_signf, aes(x = Treatment, y = value, fill = Treatment)) +
  geom_boxplot() +  
  geom_signif(
    data = my_pvalue_df,
    aes(xmin = start, xmax = end, annotations = label, y_position = y, tip_length = tip),
    textsize = 3, vjust = -0.2, manual = TRUE
  ) + 
  geom_blank(data = my_limits_df, aes(Treatment, value)) +
  facet_wrap(~phenotype, scales = "free_y") +
  scale_fill_manual(values = npg_colors_2) +
  theme_bw() + 
  labs(x = NULL, y = "Relative abundance") +
  theme(legend.position = "none")
# ggsave("output/bugbase/figure_4.pdf", p_bugbase, width = 5, height = 4)
# p_bugbase
# 
# {p_s5a + p_bugbase + plot_layout(ncol = 2)} + p_s5b
p_f4 <- (p_f4a + p_bugbase + plot_layout(nrow = 1, widths = c(0.8, 1))) -
  p_f4c + plot_layout(ncol = 1, heights = c(1, 1)) +
  plot_annotation(tag_levels = "A")
ggsave("output/figure/figure_4.pdf", p_f4, width = 6.5, height = 8)
p_f4
**Figure 4.** Functional analysis on day 28. (A) Principal component analysis (PCA) plot comparing the KEGG ontology (KO) between HA and CR subjects. KO was predicted using PICRUSt. (B) Phenotypic prediction using BugBase. Among all the phenotypes, the relative abundance of the aerobic was significantly increased and the stress tolerant was significantly decreased after HA subjects. Statistical significance was determined by Wilcoxon rank sum tests with FDR correction. P value: * P < 0.05. (C) Predicted KEGG Pathways differentially abundant between HA and CR subjects.

Figure 4. Functional analysis on day 28. (A) Principal component analysis (PCA) plot comparing the KEGG ontology (KO) between HA and CR subjects. KO was predicted using PICRUSt. (B) Phenotypic prediction using BugBase. Among all the phenotypes, the relative abundance of the aerobic was significantly increased and the stress tolerant was significantly decreased after HA subjects. Statistical significance was determined by Wilcoxon rank sum tests with FDR correction. P value: * P < 0.05. (C) Predicted KEGG Pathways differentially abundant between HA and CR subjects.

# mapping <- read_tsv("data/raw/mapping.txt")
# selected_reads_sample_name <- mapping$InputFileName %>%
#   tools::file_path_sans_ext()
# selected_reads_file_name <- paste0(selected_reads_sample_name, ".fna")
# 
# reads_path <- "/P101SC18030731-01-B8-3_result/01.CleanData"
# # reads_sample_name <- list.dirs(reads_path, full.names = FALSE, recursive = FALSE)
# selected_reads_file <- file.path(
#   reads_path,
#   selected_reads_sample_name,
#   selected_reads_file_name
# )
# reads_file_to <- file.path(
#   "data/raw/reads",
#   map_chr(
#     selected_reads_file_name,
#     ~ ifelse(str_detect(.x, "^HA"), # sample name transform
#       str_replace(.x, "^HA", "CR"),
#       str_replace(.x, "^CR", "HA")
#     )
#   )
# )
# # file.copy(selected_reads_file, reads_file_to)
# mapping$InputFileName <- basename(reads_file_to)
# # write_tsv(mapping, "data/raw/mapping_new.txt")

HA harbors a modified microbial ecological network

To examine the changes on HA microbial community structure and how HA affects the microbial interactions, we utilized SPIEC-EASI33 to infer two microbial ecological networks of HA and CR subjects. In general, there were more positive correlations than negative correlations in both ecological networks.

# To minimize the interference of low confidence OTUs, OTUs that were less than 100 reads over all samples or present in less 30% of the samples were filter out. 
ps_28 <- subset_samples(ps_big, Time == 28)
otu <- otu_table(ps_28)
otu <- otu[rowSums(otu > 0) > 5, ]
otu <- otu[rowSums(otu) > 100, ]
otu_table(ps_28) <- otu

ps_ha28 <- subset_samples(ps_28, Treatment == "HA")
ps_cr28 <- subset_samples(ps_28, Treatment == "CR")

cor_ha28 <- spiec.easi(ps_ha28, method='mb', lambda.min.ratio=1e-2, nlambda=10,
  icov.select.params=list(rep.num=100, ncores=2))
net_ha28 <- make_net(ps_ha28, cor_ha28)

cor_cr28 <- spiec.easi(ps_cr28, method='mb', lambda.min.ratio=1e-2, nlambda=10,
  icov.select.params=list(rep.num=100, ncores=2))
net_cr28 <- make_net(ps_cr28, cor_cr28)

The degree distributions of the vertices were similar in the two networks (Figure 5C).

dd_ha28 <- degree_distribution(net_ha28, cumulative = FALSE)
dd_cr28 <- degree_distribution(net_cr28, cumulative = FALSE)

n_dd_ha28 <- length(dd_ha28)
n_dd_cr28 <- length(dd_cr28)
dd_tibble <- tibble(
  x = c(0:(n_dd_ha28 - 1), 0:(n_dd_cr28 - 1)),
  y = c(dd_ha28, dd_cr28),
  state = rep(c("HA", "CR"), c(n_dd_ha28, n_dd_cr28))
)

p_dd <- ggplot(dd_tibble, aes(x, y, color = state)) +
  geom_point() + 
  geom_line() +
  theme_bw() +
  scale_color_manual(values = npg_colors_2) +
  labs(x = "Degree", y = "Frequency") +
  theme(legend.title = element_blank(), legend.position = c(0.87, 0.87)) 

We then estimated the natural connectivity, a general metric to measure robustness of network against attacks by sequentially removing hubs (nodes with high degree centrality). The HA network was more robust than the CR network by removing high degree nodes. (Fig. 5D)

nc_ha28_degree <- nc_attack(net_ha28,method = "degree")
## 
 Progress: ─────────────────────────────────────────────────────────────────────────────────      100%
 Progress: ─────────────────────────────────────────────────────────────────────────────────      100%
 Progress: ───────────────────────────────────────────────────────────────────────────────────    100%
 Progress: ─────────────────────────────────────────────────────────────────────────────────────  100%
 Progress: ────────────────────────────────────────────────────────────────────────────────────── 100%
nc_cr28_degree <- nc_attack(net_cr28, method = "degree")
## 
 Progress: ───────────────────────────────────────────────────────────────────────────────────    100%
 Progress: ─────────────────────────────────────────────────────────────────────────────────────  100%
 Progress: ────────────────────────────────────────────────────────────────────────────────────── 100%
nc_tibble <- bind_rows(
  make_nc_tibble(nc_ha28_degree, "HA", "degree"),
  make_nc_tibble(nc_cr28_degree, "CR", "degree"),
)
p_nc <- ggplot(nc_tibble, aes(x, y, color = state)) +
  geom_line() +
  theme_bw() + 
  scale_color_manual(values = npg_colors_2) +
  theme(legend.position = c(0.87, 0.87), legend.title = element_blank()) +
  labs(x = "Percentage of removed nodes", y = "Natural connectivity") 

In general, there were more positive correlations than negative correlations in both ecological networks (Fig. 5A,B, Supplementary Table S6).

# import svg
rsvg_svg("data/net_cytoscape/ha28.svg", "data/net_cytoscape/ha28_cairo.svg")
rsvg_svg("data/net_cytoscape/cr28.svg", "data/net_cytoscape/cr28_cairo.svg")

ha_picture <- grImport2::readPicture("data/net_cytoscape/ha28_cairo.svg")
cr_picture <- grImport2::readPicture("data/net_cytoscape/cr28_cairo.svg")

# legend
cols <- ggsci::pal_npg(palette = c("nrc"), alpha = 1)(10) %>% 
  stringr::str_sub(1, 7) %>% 
  rev

phylums <- unique(vertex_attr(net_ha28, "Phylum"))
phylums <- c(setdiff(phylums, "Unclassified"), "Unclassified") 

lgd <- ComplexHeatmap::Legend(at = sort(phylums), title = "Phylum",
  title_position = "leftcenter",
  legend_gp = gpar(fill = cols),
  direction = "horizontal",
  nrow = 2
)

# generate figure 5
generate_figure5 <- function(ha_picture, cr_picture, legend, p_dd, p_nc) {
  tag_text <- textGrob("A", gp = gpar(cex = 1.2))
  grid.newpage()
  g_lay <- grid.layout(5,4, 
    widths = unit.c(
      unit(1.5, "grobwidth", tag_text), unit(1, "null"),
      unit(1.5, "grobwidth", tag_text), unit(1, "null")), 
    heights = unit.c(
      unit(1.5, "grobheight", tag_text),
      unit(1, "null"),
      unit(5, "grobheight", tag_text),
      unit(1.5, "grobheight", tag_text), 
      unit(1, "null")
    )
  )
  pushViewport(viewport(layout = g_lay))

  pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 1, name = "tag_1"))
  grid.text("A", gp = gpar(cex = 1.2))
  upViewport()
  
  pushViewport(viewport(layout.pos.row = 2, layout.pos.col = c(1,2), name = "ha_net"))
  grImport2::grid.picture(ha_picture, expansion = 0, ext = "clipbbox", width = unit(1, "npc"))
  upViewport()
  
  pushViewport(viewport(layout.pos.row = 1, layout.pos.col = 3, name = "tag_2"))
  grid.text("B", gp = gpar(cex = 1.2))
  upViewport()

  pushViewport(viewport(layout.pos.row = 2, layout.pos.col = c(3,4), name = "cr_net"))
  grImport2::grid.picture(cr_picture, expansion = 0, ext = "clipbbox", width = unit(1, "npc"))
  upViewport()
  
  pushViewport(viewport(layout.pos.row = 3, name = "legend"))
  draw(lgd)
  upViewport()

  pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 1, name = "tag_3"))
  grid.text("C", gp = gpar(cex = 1.2))
  upViewport()
  
  pushViewport(viewport(layout.pos.row = 4, layout.pos.col = 3, name = "tag_4"))
  grid.text("D", gp = gpar(cex = 1.2))
  upViewport()

  pushViewport(viewport(layout.pos.row = 5, layout.pos.col = c(1,2), name = "degree"))
  grid.draw(ggplotGrob(p_dd))
  upViewport()
  
  pushViewport(viewport(layout.pos.row = 5, layout.pos.col = c(3,4), name = "robust"))
  grid.draw(ggplotGrob(p_nc))
}
generate_figure5(ha_picture, cr_picture, lgd, p_dd, p_nc)
**Figure 5.** Ecological networks inferred using SPIEC-EASI in HA (A) and CR (B) group. Nodes represents OTUs and colored by its phylum; red edges represent positive correlations and green edges represent negative correlations. (C) Degree distributions of the two networks are similar. (D) Natural Connectivity was used to measure the robustness of networks by sequentially removing high degree nodes. The result showed that HA network was more robust than CR network.

Figure 5. Ecological networks inferred using SPIEC-EASI in HA (A) and CR (B) group. Nodes represents OTUs and colored by its phylum; red edges represent positive correlations and green edges represent negative correlations. (C) Degree distributions of the two networks are similar. (D) Natural Connectivity was used to measure the robustness of networks by sequentially removing high degree nodes. The result showed that HA network was more robust than CR network.

pdf("output/figure/figure_f5.pdf", height = 8, width = 8)
generate_figure5(ha_picture, cr_picture, lgd, p_dd, p_nc)
dev.off()
## png 
##   2

Supplementary Table S6

edge_ha28 <- igraph::as_data_frame(net_ha28)
vertex_ha28 <- igraph::as_data_frame(net_ha28, "vertices")

edge_ha28$from_genus <- vertex_ha28$Genus[match(edge_ha28[, 1], vertex_ha28$name)]
edge_ha28$to_genus <- vertex_ha28$Genus[match(edge_ha28[, 2], vertex_ha28$name)]

edge_ha28$color <- NULL
edge_ha28 <- dplyr::rename(edge_ha28, `spiec-easi correlation` = weight) %>% 
  mutate( 
    from_genus = ifelse(from_genus != "g__", from_genus, NA),
    to_genus = ifelse(to_genus != "g__", to_genus, NA),
    type = ifelse( `spiec-easi correlation` > 0, "positive", "negative")) %>% 
  mutate(`intra genus` = ifelse(from_genus == to_genus, "true", "false"))
edge_ha_intra_genus <- filter(edge_ha28, from_genus == to_genus)
edge_ha_inter_genus <- filter(edge_ha28, from_genus != to_genus)
  
edge_cr28 <- igraph::as_data_frame(net_cr28)
vertex_cr28 <- igraph::as_data_frame(net_cr28, "vertices")
edge_cr28$from_genus <- vertex_cr28$Genus[match(edge_cr28[, 1], vertex_cr28$name)]
edge_cr28$to_genus <- vertex_cr28$Genus[match(edge_cr28[, 2], vertex_cr28$name)]

edge_cr28$color <- NULL
edge_cr28 <- dplyr::rename(edge_cr28, `spiec-easi correlation` = weight) %>% 
  mutate( 
    from_genus = ifelse(from_genus != "g__", from_genus, NA),
    to_genus = ifelse(to_genus != "g__", to_genus, NA),
    type = ifelse( `spiec-easi correlation` > 0, "positive", "negative")) %>% 
  mutate(`intra genus` = ifelse(from_genus == to_genus, "true", "false"))

write_xlsx(list(`HA network` = edge_ha28, `CR network` = edge_cr28), 
  path = "output/table/table_s6.xlsx")

We then compared the degrees of OTUs in the four significantly different genera Blautia, Oscillospira, Lactobacillus, Allobaculum. The degree of Lactobacillus in HA network was significantly lower than in CR network (P = 0.0396, Wilcoxon sum test, FDR corrected), while degrees of OTUs in other three genera were similar in the two networks (Supplementary Fig. S5).

degree_cr28 <- igraph::degree(net_cr28, normalized = TRUE)
degree_ha28 <- igraph::degree(net_ha28, normalized = TRUE)
degree_df <- tibble(
  degree = c(degree_cr28, degree_ha28),
  genus = c(vertex_cr28$Genus, vertex_ha28$Genus),
  state = rep(c("CR", 'HA'), each = length(degree_cr28))
)

marker_genus <- filter(degree_df, genus %in%
    c("g__Blautia", "g__Oscillospira", "g__Lactobacillus", "g__Allobaculum")
)
p_s5 <- ggplot(marker_genus, aes(state, degree, fill = state)) +
  geom_boxplot() +
  facet_wrap(~genus, ncol = 4) +
  ggpubr::stat_compare_means(
    comparisons = list(c("CR", "HA")), label = "p.signif") +
  theme_bw() +
  theme(legend.position = "none") +
  scale_fill_manual(values = npg_colors_2) +
  labs(x = NULL, y = "Degree")
p_s5
**Figure S5.** The degrees of OTUs in the four significant different genera of inferred ecological networks. P values are from Wilcoxon rank sum test. P value: * P < 0.05; ns, no significance P > 0.05.

Figure S5. The degrees of OTUs in the four significant different genera of inferred ecological networks. P values are from Wilcoxon rank sum test. P value: * P < 0.05; ns, no significance P > 0.05.

ggsave("output/figure/figure_s5.pdf", p_s5, width = 7, height = 4)

SessionInfo

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      parallel  stats4    tools     stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
##  [1] writexl_1.1                 furrr_0.1.0                
##  [3] future_1.14.0               grImport2_0.1-5            
##  [5] rsvg_1.3                    igraph_1.2.4.1             
##  [7] SpiecEasi_1.0.6             ggsignif_0.5.0             
##  [9] ggfortify_0.4.7             circlize_0.4.6             
## [11] treeio_1.8.1                ggtree_1.16.3              
## [13] ComplexHeatmap_2.0.0        DESeq2_1.24.0              
## [15] SummarizedExperiment_1.14.0 DelayedArray_0.10.0        
## [17] BiocParallel_1.18.0         matrixStats_0.54.0         
## [19] Biobase_2.44.0              GenomicRanges_1.36.0       
## [21] GenomeInfoDb_1.20.0         IRanges_2.18.1             
## [23] S4Vectors_0.22.0            BiocGenerics_0.30.0        
## [25] hrbrthemes_0.7.1            vegan_2.5-5                
## [27] lattice_0.20-38             permute_0.9-5              
## [29] patchwork_0.0.1             ggpubr_0.2.1               
## [31] magrittr_1.5                eulerr_5.1.0               
## [33] microbiome_1.6.0            phyloseq_1.28.0            
## [35] ggsci_2.9                   forcats_0.4.0              
## [37] stringr_1.4.0               dplyr_0.8.3                
## [39] purrr_0.3.2                 readr_1.3.1                
## [41] tidyr_1.0.0                 tibble_2.1.3               
## [43] ggplot2_3.2.1               tidyverse_1.2.1            
## [45] knitr_1.23                 
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.1.4        Hmisc_4.2-0           
##   [4] VGAM_1.1-1             plyr_1.8.4             lazyeval_0.2.2        
##   [7] polylabelr_0.1.0       splines_3.6.0          listenv_0.7.0         
##  [10] digest_0.6.20          foreach_1.4.7          htmltools_0.3.6       
##  [13] checkmate_1.9.4        memoise_1.1.0          cluster_2.0.8         
##  [16] globals_0.12.4         Biostrings_2.52.0      annotate_1.62.0       
##  [19] modelr_0.1.4           extrafont_0.17         extrafontdb_1.0       
##  [22] jpeg_0.1-8             colorspace_1.4-1       blob_1.2.0            
##  [25] rvest_0.3.4            haven_2.1.1            xfun_0.8              
##  [28] crayon_1.3.4           RCurl_1.95-4.12        jsonlite_1.6          
##  [31] genefilter_1.66.0      zeallot_0.1.0          survival_2.44-1.1     
##  [34] iterators_1.0.12       ape_5.3                glue_1.3.1            
##  [37] polyclip_1.10-0        gtable_0.3.0           zlibbioc_1.30.0       
##  [40] XVector_0.24.0         GetoptLong_0.1.7       Rhdf5lib_1.6.0        
##  [43] Rttf2pt1_1.3.7         shape_1.4.4            scales_1.0.0          
##  [46] DBI_1.0.0              Rcpp_1.0.2             xtable_1.8-4          
##  [49] htmlTable_1.13.1       clue_0.3-57            tidytree_0.2.5        
##  [52] foreign_0.8-71         bit_1.1-14             Formula_1.2-3         
##  [55] htmlwidgets_1.3        httr_1.4.0             pulsar_0.3.5          
##  [58] RColorBrewer_1.1-2     ellipsis_0.2.0.1       acepack_1.4.1         
##  [61] pkgconfig_2.0.2        XML_3.98-1.20          nnet_7.3-12           
##  [64] locfit_1.5-9.1         labeling_0.3           tidyselect_0.2.5      
##  [67] rlang_0.4.0            reshape2_1.4.3         AnnotationDbi_1.46.0  
##  [70] munsell_0.5.0          cellranger_1.1.0       cli_1.1.0             
##  [73] generics_0.0.2         RSQLite_2.1.2          ade4_1.7-13           
##  [76] broom_0.5.2            evaluate_0.14          biomformat_1.12.0     
##  [79] yaml_2.2.0             bit64_0.9-7            nlme_3.1-139          
##  [82] xml2_1.2.0             compiler_3.6.0         rstudioapi_0.10       
##  [85] png_0.1-7              huge_1.3.2             geneplotter_1.62.0    
##  [88] stringi_1.4.3          highr_0.8              gdtools_0.1.9         
##  [91] Matrix_1.2-17          multtest_2.40.0        vctrs_0.2.0           
##  [94] pillar_1.4.2           lifecycle_0.1.0        GlobalOptions_0.1.0   
##  [97] cowplot_1.0.0          data.table_1.12.2      bitops_1.0-6          
## [100] R6_2.4.0               latticeExtra_0.6-28    gridExtra_2.3         
## [103] codetools_0.2-16       MASS_7.3-51.1          assertthat_0.2.1      
## [106] rhdf5_2.28.0           rjson_0.2.20           withr_2.1.2           
## [109] GenomeInfoDbData_1.2.1 mgcv_1.8-28            hms_0.5.0             
## [112] rpart_4.1-15           rvcheck_0.1.3          rmarkdown_1.14        
## [115] Rtsne_0.15             lubridate_1.7.4        base64enc_0.1-3